import time
from random import random

import requests
from bs4 import BeautifulSoup

#爬取豆瓣top250
def fetch_douban_movie():
    #设置请求头：模拟浏览器访问。
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    try:
        #循环爬取多页数据
        for s in range(0,250,25):
            print(f"第{int(s/25+1)}页数据：")
            # 1.发送HTtp请求
            url = f'https://movie.douban.com/top250start={s}'
            response = requests.get(url, headers=headers)

            # 2.检测响应是否成功
            response.raise_for_status()

            # 3.解析HTtp内容
            # 第二个参数指定内容，指定使用的解析器，‘html.parser’是Python自带的HTml解析器
            # soup：代表的是整个解析后的文档树
            soup = BeautifulSoup(response.text, 'html.parser')

            # 找到所有的电影
            items = soup.find_all('div', class_='item')
            for item in items:
                #电影名称
                title = item.find('span', class_='title').text
                #排行榜

                #评分


                print(title)

            # 控制请求的频率
            time.sleep(random.uniform(1, 3))



    except requests.exceptions.RequestException as e:
        print('失败',e)
    except Exception as e:
        print(e)

if __name__ == '__main__':
    fetch_douban_movie()